Author

Carson Freedman

Code
library(tidyverse)
library(here)
library(dplyr)
library(broom)
library(DT)
library(knitr)
library(kableExtra)
Code
names <- read_csv(file = here("Week9", "Lab9", "StateNames_A.csv"))
DT::datatable(names)

1

Code
names |>
  group_by(State, Gender) |>
  filter(Name == "Allison") |>
  summarize(Total = sum(Count)) |>
  kable(format = "html", 
        caption = "Number of babies Named 'Allison' by State") |>
  kable_material(c("striped", "hover"))
Number of babies Named 'Allison' by State
State Gender Total
AK F 232
AL F 1535
AR F 1198
AZ F 1880
CA F 12413
CO F 1594
CT F 1099
DC F 321
DE F 294
FL F 4455
GA F 3257
HI F 183
IA F 1477
ID F 451
IL F 5110
IN F 3067
KS F 1283
KY F 1905
KY M 20
LA F 1209
MA F 2218
MD F 2229
ME F 340
MI F 4014
MN F 2374
MO F 2882
MS F 817
MT F 226
NC F 3435
ND F 285
NE F 807
NH F 412
NJ F 3052
NM F 399
NV F 729
NY F 5747
OH F 5487
OK F 1421
OR F 1186
PA F 4307
RI F 306
SC F 1228
SD F 376
TN F 2488
TX F 10192
UT F 1125
VA F 3220
VT F 135
WA F 1956
WI F 2367
WV F 813
WY F 142

2

Code
allison_f <- names |>
  group_by(Year) |>
  filter(Name == "Allison", Gender == "F") |>
  summarize(Total = sum(Count))

3

Code
allison_f |>
  ggplot(mapping = aes(x = Year, y = Total)) +
  geom_col() +
  lims(x = c(1996, 2014)) +
  labs(x = "Year", y = "", 
       title = "Number of Children Named 'Allison' per Year in the US")

4

Code
linear_model <- lm(Total ~ Year, data = allison_f)
  
broom::tidy(linear_model) |>
  kable(format = "html") |>
  kable_material(c("striped", "hover"))
term estimate std.error statistic p.value
(Intercept) 209689.7609 42971.50497 4.879740 0.0001669
Year -101.5191 21.42676 -4.737959 0.0002228

5

y = 209689.7609 - 101.5191(x)

6

Code
linear_model |> 
  broom::augment() |> 
  ggplot(mapping = aes(y = .resid, x = .fitted)) +
  geom_point() +
  stat_smooth(method = "lm")

7

The resulting linear model doesn’t provide enough information to determine the trend over time.

8

Code
names |>
  group_by(Name, Year) |>
  filter(Name == "Allan" | Name == "Allen" | Name == "Alan") |>
  summarize(Total = sum(Count)) |>
  ggplot(mapping = aes(x = Year, y = Total, fill = Name, color = Name)) +
  geom_col() +
  lims(x = c(1996, 2014))

9

Code
names |>
  filter(Name == "Allan" | Name == "Allen" | Name == "Alan", Year == 2000,
         State == "PA" | State == "CA") |>
  group_by(Name, State) |>
  summarize(Count = sum(Count)) |>
  pivot_wider(names_from = Name, values_from = Count) |>
  kable(format = "html", col.names = c('State', 'Alans', 'Allans', 'Allens'),
        caption = "Number of Babies Named a Variation of 'Allan' in CA & PA") |>
  kable_material(c("striped", "hover"))
Number of Babies Named a Variation of 'Allan' in CA & PA
State Alans Allans Allens
CA 584 131 176
PA 51 12 56

10

Code
names |>
  filter(Name == "Allan" | Name == "Allen" | Name == "Alan", Year == 2000,
         State == "PA" | State == "CA") |>
  group_by(State) |>
  count(Name, wt = Count) |>
  mutate(prop = n / sum(n)) |>
  select(-n) |> 
  pivot_wider(names_from = Name, values_from = prop) |>
  kable(format = "html", col.names = c('State', 'Alan', 'Allan', 'Allen'),
        caption = "Proportion of Babies Named a Variation of 'Allan' in CA & PA") |>
  kable_material(c("striped", "hover"))
Proportion of Babies Named a Variation of 'Allan' in CA & PA
State Alan Allan Allen
CA 0.6554433 0.1470258 0.1975309
PA 0.4285714 0.1008403 0.4705882